This study uses Nihommatsu city and Mimanisoma City as case studies. The two cities are 70km and 30km from Fukushima Daichi Plant respectively.
This research seeks to apply several machine learning algorithms to estimate the rate of reduction of Fukushima Air Dose with respect to variations in population density, land use and distance to Fukushima Nuclear Plant.
From Linear Model \[Y{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2} X_{2i} + \beta_{3} X_{1i} X_{2i} + \epsilon_{i}\]
Hypothesis \[AvgAirDoseRate = \beta_{0} + \beta_{1} daichi distance + \beta_{2} popn density + \beta_{3} human activities + \beta_{4}(popn density)(human activities) + \epsilon_{i}\]
1.11 Required Packages
library("leaflet")
library(readr)
library(dplyr)
library(RColorBrewer)
library(Hmisc)
library(ggplot2)
library(formatR)
NB:Important Notes from the Nuclear Regulation Authority on JAEA website
Purposes of this vehicle survey were:
##### 1.11 Loading June 2011 Fukushima Data and selecting Nihomatsu’s.
niho <- read_csv("jun_2011_fukushima.csv")
niho2013 <- read_csv("nov_2013_fukushima.csv")
# View(niho2013)
# names(niho)
dim(niho)
## [1] 45273 18
# head(niho)
class(niho)
## [1] "tbl_df" "tbl" "data.frame"
names(niho) <- c("gridcode", "pref", "city", "gridCenterNorthlat",
"gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec",
"daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat",
"NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong",
"SE_nLat", "SE_eLong")
names(niho2013) <- c("gridcode", "pref", "city", "gridCenterNorthlat",
"gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec",
"daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat",
"NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong",
"SE_nLat", "SE_eLong")
# Strip Nihommatsu city,
niho$city[niho$city == "Nihommatsu city"] <- "nihommatsu"
niho2013$city[niho2013$city == "Nihommatsu city"] <- "nihommatsu"
nihom <- subset(niho, city == "nihommatsu")
nihom2013 <- subset(niho2013, city == "nihommatsu")
dim(nihom)
## [1] 2944 18
dim(nihom2013)
## [1] 11758 18
class(nihom)
## [1] "tbl_df" "data.frame"
niho_q <- nihom %>% mutate(dose_quants = cut2(nihom$AvgAirDoseRate,
cuts = c(0.1, 0.5, 1, 1.5, 2, 2.5, 3), levels.mean = TRUE))
niho_q <- na.omit(niho_q)
# write_csv(niho_q, path = 'niho_q.csv')
nihom2013_q <- nihom2013 %>% mutate(dose_quants = cut2(nihom2013$AvgAirDoseRate,
cuts = seq(0.06, 1.6, 0.25), levels.mean = TRUE))
summary(nihom2013$AvgAirDoseRate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0630 0.2800 0.3700 0.4009 0.4900 1.4000
summary(niho_q$AvgAirDoseRate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.190 0.710 1.000 1.029 1.300 2.500
iro <- colorFactor(palette = "YlOrRd", domain = niho_q$dose_quants)
iro2013 <- colorFactor(palette = "YlOrRd", domain = nihom2013_q$dose_quants)
# Link of Daichi
fukulink <- paste(sep = "<br/>", "<br><a href='http://www.tepco.co.jp
/en/decommision/index-e.html'>Fukushima Daichi</a></b>",
"Source of radiations")
niho_plot <- leaflet() %>% addTiles() %>% addRectangles(data = niho_q,
lng1 = ~SW_eLong, lat1 = ~SW_nLat, lng2 = ~NE_eLong, lat2 = ~NE_nLat,
color = ~iro(niho_q$dose_quants)) %>% addLegend("bottomright",
pal = iro, values = niho_q$dose_quants, title = "AvgAirDoseRate",
labFormat = labelFormat(prefix = "µSv/h "), opacity = 1) %>%
addPopups(lat = 37.4211, lng = 141.0328, popup = fukulink,
options = popupOptions(closeButton = TRUE))
niho_plot
niho2013_plot <- leaflet() %>%
addTiles()%>%
addRectangles(data = nihom2013_q,lng1 = ~SW_eLong, lat1 = ~SW_nLat,
lng2 = ~NE_eLong, lat2 = ~NE_nLat,
color = ~iro2013(nihom2013_q$dose_quants))%>%
addLegend("bottomright", pal = iro2013, values = nihom2013_q$dose_quants,
title = "AvgAirDoseRate",
labFormat = labelFormat(prefix = "µSv/h "),
opacity = 1)%>%
addPopups(lat = 37.4211, lng = 141.0328,popup = fukulink,
options = popupOptions(closeButton = TRUE))
niho2013_plot
ggplot(niho_q, aes(daichi_distance,AvgAirDoseRate)) +
geom_point() +
geom_smooth(se = FALSE)+
ggtitle("AvgAirDose against Distance to Daichi Plant")
ggplot(data = niho_q) +
geom_bar(mapping = aes(x = daichi_distance, fill = dose_quants), width = 1)+
ggtitle("AvgAirDose Measured Counts against Daichi Distance")
mina <- read_csv("niho.csv")
dim(mina)
## [1] 45273 18
#change Minamisoma city to minamisoma
mina$city[mina$city == "Minamisoma city"] <- "minamisoma"
#filter nihomatsu observations only
mina <- subset(mina, city == "minamisoma")
dim(mina)
## [1] 1985 18
summary(mina$AvgAirDoseRate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.210 0.520 0.780 1.771 1.800 18.000
mina_q <- mina %>% mutate(mina_quants = cut2(mina$AvgAirDoseRate,
cuts = seq(0.2, 20, 2.2), levels.mean = TRUE))
mina_q <- na.omit(mina_q) #remove NAs
write_csv(mina_q, path = "mina_q.csv")
mina_iro <- colorFactor(
palette = "Set1",
domain = mina_q$mina_quants
)
mina_plot <- leaflet() %>% addTiles() %>% addRectangles(data = mina_q,
lng1 = ~SW_eLong, lat1 = ~SW_nLat, lng2 = ~NE_eLong, lat2 = ~NE_nLat,
color = ~mina_iro(mina_q$mina_quants)) %>% addLegend("bottomright",
pal = mina_iro, values = mina_q$mina_quants, title = "AvgAirDoseRate",
labFormat = labelFormat(prefix = "µSv/h "), opacity = 1) %>%
addPopups(lat = 37.4211, lng = 141.0328, popup = fukulink,
options = popupOptions(closeButton = TRUE))
mina_plot
ggplot(mina_q, aes(daichi_distance,AvgAirDoseRate)) +
geom_point() +
geom_smooth(se = FALSE)+
ggtitle("AvgAirDose against Distance to Daichi Plant")
ggplot(data = mina_q) +
geom_bar(mapping = aes(x = daichi_distance, fill = mina_quants), width = 1)+
ggtitle("Counts of Measuring Locations per Air Dose Rate")
library(ggplot2)
niho_q <- read_csv("niho_q.csv")
niho2013_q <- read_csv("niho2013.csv")
names(niho_q) <- c("gridcode", "pref", "city", "gridCenterNorthlat", "gridCenterEastlng",
"gridCenterNorthlatDec", "gridCenterEastlngDec", "daichi_distance", "no_samples",
"AvgAirDoseRate2011", "NE_nLat", "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat",
"SW_eLong", "SE_nLat", "SE_eLong")
names(niho2013_q) <- c("gridcode", "pref", "city", "gridCenterNorthlat", "gridCenterEastlng",
"gridCenterNorthlatDec", "gridCenterEastlngDec", "daichi_distance", "no_samples",
"AvgAirDoseRate2013", "NE_nLat", "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat",
"SW_eLong", "SE_nLat", "SE_eLong")
niho11_13 <- merge(niho_q, niho2013_q, by.x = "gridcode", by.y = "gridcode",
all = TRUE)
# Check if the merged columns are real identical
niho11_13 <- na.omit(niho11_13)
identical(niho11_13$gridCenterNorthlat.x, niho11_13$gridCenterNorthlat.y)
## [1] TRUE
identical(niho11_13$daichi_distance.x, niho11_13$daichi_distance.y)
## [1] TRUE
identical(niho11_13$no_samples.x, niho11_13$no_samples.y) #2011 and 2013 samples differ
## [1] FALSE
niho11_13 <- select(niho11_13, gridcode, pref.x, city.x, gridCenterNorthlat.x,
gridCenterEastlng.x, gridCenterNorthlatDec.x, gridCenterEastlngDec.x, daichi_distance.x,
no_samples.x, AvgAirDoseRate2011, NE_nLat.x, NE_eLong.x, NW_nLat.x, NW_eLong.x,
no_samples.y, AvgAirDoseRate2013)
# create new data set (niho11_13)
write_csv(niho11_13, path = "niho11_13.csv")
niho11_13 <- read_csv("niho11_13.csv")
# Compare AvgAirDoseRate of 2011&2013
plot(x = niho11_13$AvgAirDoseRate2011, type = "l", col = "red", ylab = "Avg Air Dose Rate",
xlab = "Frequency of measuring", main = "Compare AvgAirDoseRate of 2011&2013 Nihomatsu")
lines(niho11_13$AvgAirDoseRate2013, col = "green")
legend("topright", legend = c("AvgAirDoseRate2011", "AvgAirDoseRate2013"))
\[AvgAirDoseRate = \beta_{0} + \beta_{1} daichi distance + \epsilon_{i}\]
fit1 <- lm(AvgAirDoseRate2013~daichi_distance.x, data = niho11_13)
summary(fit1)
##
## Call:
## lm(formula = AvgAirDoseRate2013 ~ daichi_distance.x, data = niho11_13)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27317 -0.10516 -0.02457 0.07873 0.89017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5844396 0.0200218 29.19 <2e-16 ***
## daichi_distance.x -0.0049126 0.0003721 -13.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1373 on 2805 degrees of freedom
## Multiple R-squared: 0.0585, Adjusted R-squared: 0.05817
## F-statistic: 174.3 on 1 and 2805 DF, p-value: < 2.2e-16
#Confidence interval
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) 0.545180562 0.623698590
## daichi_distance.x -0.005642192 -0.004182925
\[AvgAirDoseRate = \beta_{0} + \beta_{1} popn density + \beta_{2} human activities + \epsilon_{i}\]
ChallengeThe Air Dose Rate measurements were collected from three sources:
What constitutes Land Use?:
Population density and land usage is measured on a 500m² basis
The Fukushima Nuclear Radiations is multi dimensional;
The above dimensionalities coupled with distance,population density and land use, create a data set that can be run on extensive machine learning algorithms like Support Vector Machines, Random Forest, Recurrent Neural Networks and more.
end